/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2003 - 2004 by Myricom, Inc.  All rights reserved.          *
 *************************************************************************/

static const char __idstring[] = "@(#)$Id: mx_instance.c,v 1.269.2.10 2006/12/14 15:48:36 patrick Exp $";

#include "mx_arch.h"
#include "mx_instance.h"
#include "mx_malloc.h"
#include "mx_misc.h"
#include "mx_peer.h"
#include "mx_pio.h"
#include "mcp_config.h"
#include "mx_stbar.h"
#include "mx_util.h"

mx_instance_state_t **mx_instances;
uint32_t mx_max_instance = MX_MAX_INSTANCE_DEFAULT;
uint32_t mx_num_instances;
uint32_t mx_max_nodes = MX_MCP_NODES_CNT;
uint32_t mx_max_endpoints = MX_MCP_ENDPOINTS_CNT;
uint32_t mx_min_light_endpoints = MX_MCP_LIGHT_ENDPOINTS_MIN;
uint32_t mx_max_light_endpoints = MX_MCP_LIGHT_ENDPOINTS_MAX;
uint32_t mx_max_send_handles = MX_MCP_SEND_HANDLES_CNT;
uint32_t mx_max_pull_handles = MX_MCP_PULL_HANDLES_CNT;
uint32_t mx_max_push_handles = MX_MCP_PUSH_HANDLES_CNT;
uint32_t mx_max_rdma_windows = MX_MCP_RDMA_WINDOWS_CNT;
uint32_t mx_cacheline_size = 0;
uint32_t mx_intr_coal_delay = MX_MCP_INTR_COAL_DELAY;
uint32_t mx_override_e_to_f = 0;
uint32_t mx_simulate_parity_error = 0;
uint32_t mx_z_loopback = 0;
uint32_t mx_pcie_down = MX_RDMA_FC;
uint32_t mx_parity_recovery = 0;
uint32_t mx_recover_from_all_errors = 0;
uint32_t mx_max_host_queries = 0;
uint32_t mx_pcie_down_on_error = 1;

static int mx_num_ports[3] = {1, 2, 1};

static int
mx_select_board_type(mx_instance_state_t *is, int vendor_id, 
		     int device_id, int pci_revision)
{
  int rc = ENODEV;

  if (vendor_id != MX_PCI_VENDOR_MYRICOM)
    goto error;

  switch (device_id) {
  case MX_PCI_DEVICE_MYRINET:
    if (mx_override_e_to_f && pci_revision == 5)
      pci_revision = 6;

    switch (pci_revision) {
    case 4:
      /* fallthrough */
    case 6:
      is->board_type = MX_BOARD_TYPE_D;
      rc = 0;
      break;  

    case 5:
      is->board_type = MX_BOARD_TYPE_E;
      rc = 0;
      break;  

    case 7:
      is->board_type = MX_BOARD_TYPE_Z;
      rc = 0;
      break;  

    default:
      break;
    }
    break;

  case MX_PCI_DEVICE_Z4E:
    /* fallthrough */
  case MX_PCI_DEVICE_Z8E:
      is->board_type = MX_BOARD_TYPE_Z;
      rc = 0;
      break;  

  default:
    break;  
  }
  if (rc)
    goto error;
  is->num_ports = mx_num_ports[is->board_type];

  /* setup function pointers for routines which vary by board type */
  switch (is->board_type) {
  case MX_BOARD_TYPE_D:	
    /* fallthrough */
  case MX_BOARD_TYPE_E:
    is->board_ops = mx_lx_ops;
    break;
  case MX_BOARD_TYPE_Z:
    is->board_ops = mx_lz_ops;
    break;
  default:
    break;
  }    

  /* refuse to load if user has compiled out support
     for a type of board */
  if (is->board_ops.map == NULL) {
    rc = ENODEV;
    goto error;
  }

  return 0;

 error:
  MX_WARN(("Unknown/Unsupported board type 0x%x:0x%x rev %d\n",
	   vendor_id, device_id, pci_revision));
  return rc;
}

    
static void
mx_restore_pci_cap(mx_instance_state_t *is)
{
  int status;

  if (is->pci_cap.command_tweaked) {
    is->pci_cap.command_tweaked = 0;
    status = mx_write_pci_config_16(is, is->pci_cap.command_offset,
				    is->pci_cap.command_orig_val);
    if (status) {
      MX_WARN(("Unable to restore PCI-X capability register to oringal value\n"));
    }
    
  }
}

int 
mx_find_capability(mx_instance_state_t *is, unsigned cap_id)
{
  uint8_t cap = 0;
  uint8_t id = 0;
  uint16_t status;
  if (mx_read_pci_config_16(is, MX_PCI_STATUS, &status) != 0
      || !MX_PCI_STATUS_CAPABILITIES_LIST(status)) {
    MX_WARN(("%d:mx_find_capability:No cap list!!!\n", is->id));
    return 0;
  }
  if (mx_read_pci_config_8(is, offsetof(mx_pci_config_t, Cap_List), &cap) != 0) {
    MX_WARN(("%d:mx_find_capability:config-space failure\n", is->id));
    return 0;
  }
  while (cap) {
    if (cap == 0xff || cap < 0x40) {
      MX_WARN(("%d:invalid cap list, found cap-ptr = 0x%x\n", is->id, cap));
      return 0;
    }
    cap &= 0xfc;
    if (mx_read_pci_config_8(is, cap, &id)) {
      MX_WARN(("%d:mx_find_capability:config-space failure\n", is->id));
      return 0;
    }
    if (id == cap_id)
      return cap;
    if (mx_read_pci_config_8(is, cap + 1, &cap) != 0) {
      MX_WARN(("%d:mx_find_capability:config-space failure\n", is->id));
      return 0;
    }
  }
  return cap;
}

/* check to see if we're using MSIs */

static int
mx_check_for_msi(mx_instance_state_t *is)
{
  int status;
  uint16_t message_control;
  uint8_t msi_cap;

  msi_cap = mx_find_capability(is, MX_PCI_CAP_MESSAGE_SIGNALLED_INTERRUPTS);
  if (msi_cap) 
    {
      /* Read the message_control field from the capability struct,
	 which has some bits we need. */
      
      MX_DEBUG_PRINT (MX_DEBUG_BOARD_INIT, ("Reading Message_Control.\n"));
      status = mx_read_pci_config_16
	(is,
	 msi_cap + offsetof(mx_pci_capability_t, msi.Message_Control),
	 &message_control);
      if (status != 0) {
	MX_WARN (("Could not read msi.Message_Control\n"));
	return status;
      }
      
      /* Determine if the OS enabled MSI. */
      
      MX_DEBUG_PRINT (MX_DEBUG_BOARD_INIT, ("Checking MSI enable.\n"));
      if (MX_PCI_CAP_MESSAGE_CONTROL_MSI_ENABLE (message_control)) {
	
	/* The OS told the card to use MSI. */
	
	is->using_msi = 1;
      } else {
	/* The OS told the card not to use MSI. */
	
	MX_DEBUG_PRINT (MX_DEBUG_BOARD_INIT, ("OS disabled MSI\n"));
      }
	    
    }
  return 0;
}

  
static int
mx_check_for_pcix_rbc(mx_instance_state_t *is)
{
  int status;
  uint8_t pcix_cap;
  uint16_t command, max_read_byte_cnt;

  pcix_cap = mx_find_capability(is, MX_PCI_CAP_PCI_X);

  if (pcix_cap) {
      MX_DEBUG_PRINT (MX_DEBUG_BOARD_INIT, ("Found a PCI-X capability\n"));
      
      /* Read the PCI-X command value */
      
      status = mx_read_pci_config_16
	(is,
	 pcix_cap + offsetof(mx_pci_capability_t, pci_x.Command),
	 &command);
      if (status != 0) {
	MX_WARN (("Could not read pci_x.Command\n"));
	return status;
      }
              
      /* Determine the maximum memory read byte count set by
	 the BIOS or OS. */

      max_read_byte_cnt = command 
	& MX_PCI_CAP_PCI_X_COMMAND_MAX_MEM_READ_BYTE_CNT_MASK	;
      
      /* Whine if setting is not optimial */
      if (max_read_byte_cnt < 
	  MX_PCI_CAP_PCI_X_COMMAND_MAX_MEM_READ_BYTE_CNT_4096) {
	MX_INFO(("Board %d: BIOS or OS set PCI-X max memory read byte count < 4KB\n", is->id));
      }

      /* Update the value if it is unacceptable (<2048).  We really
	 would like to use 4096 byte reads, but that is known not to
	 be reliable with some chipsets.  Using  2048 instead only
	 reduces bidirectional bandwidth  about 5% and does not
	 effect unidirectional  performance, according to our
	 tests. 
      */
      
      if (MX_PCI_X_FORCE_LARGE_READ &&
	  (max_read_byte_cnt < 	  	
	   MX_PCI_CAP_PCI_X_COMMAND_MAX_MEM_READ_BYTE_CNT_2048)) {

	is->pci_cap.command_tweaked = 1;
	is->pci_cap.command_offset = 
	  pcix_cap + offsetof(mx_pci_capability_t, pci_x.Command);
	is->pci_cap.command_orig_val = command;
	
	command = (command ^ max_read_byte_cnt ^ 
		   MX_PCI_CAP_PCI_X_COMMAND_MAX_MEM_READ_BYTE_CNT_2048);
	
	MX_WARN(("Board %d: Forcing PCI-X max_mem_read_byte_cnt to 2KB\n", is->id));
	
	/* Write back the PCI-X command value */
	
	status = mx_write_pci_config_16(is, is->pci_cap.command_offset,
					command);
	if (status != 0) {	
	  MX_WARN (("Board %d: Could not write pci_x.Command\n", is->id));
	  return status;
	}
      }
  }
    
  return 0;
}


static int
mx_disable_pci_config_command_bit(mx_instance_state_t * is,  uint16_t value)
{
  uint16_t command;
  int status;

  status = mx_read_pci_config_16(is, offsetof(mx_pci_config_t, Command),
				 &command);
  if (status) {
    MX_NOTE (("Could not read PCI command register.\n"));
    return (status);
  }
  command &= ~(value);
  
  status = mx_write_pci_config_16(is, offsetof(mx_pci_config_t, Command),
				  command);
  if (status) {
    MX_NOTE (("Could not write PCI command register.\n"));
    return (status);
  }

  /* Pause for at least 10ms */
  mx_spin (15000);
  status = mx_read_pci_config_16(is, offsetof(mx_pci_config_t, Command),
			       &command);
  return status;
}

static int
mx_enable_pci_config_command_bit(mx_instance_state_t *is,  uint16_t value)
{
  uint16_t command;
  int status;

  status = mx_read_pci_config_16(is, offsetof(mx_pci_config_t, Command),
				 &command);
  if (status) {
    MX_WARN (("Board %d: Could not read PCI command register.\n",
	      is->id));
    return (status);
  }
  command |= value;
  
  status = mx_write_pci_config_16(is, offsetof(mx_pci_config_t, Command),
				  command);
  if (status) {
    MX_WARN (("Board %d: Could not write PCI command register.\n",
	      is->id));
    return (status);
  }

  /* Pause for at least 10ms */
  mx_spin (15000);

  status = mx_read_pci_config_16(is, offsetof(mx_pci_config_t, Command),
				 &command);
  if (status) {
    MX_WARN (("Board %d: Could not read PCI command register.\n",
	      is->id));
    return (status);
  }

  if ((command & value) != value) {
    MX_WARN (("Board %d: Couldn't set pci config command bit 0x%x\n",
	      is->id, value));
    status = EIO;
  }

  return status;
}


void
mx_lanai_print(mx_instance_state_t *is, int idx)
{
  char *c;
  int newline = 0;
  uint32_t *mcp_print_limit;
  unsigned long flags;

  flags = 0;  /* useless initialization to pacify -Wunused */

  mx_spin_lock_irqsave(&mx_lanai_print_spinlock, flags);
  c = is->mcp_print_buffer;
  if (is->mcp_print_len && idx < is->mcp_print_len) {
    do {
      if (newline) {
	*c = '\0';
	c = is->mcp_print_buffer;
	MX_PRINT(("LANai[%d]: %s", is->id, c));
      }
      *c = ((char *)is->lanai.sram + is->mcp_print_addr)[is->mcp_print_idx];
      newline = (*c == '\n');
      c++;
      is->mcp_print_idx++;
      if (is->mcp_print_idx >= is->mcp_print_len)
	is->mcp_print_idx = 0;
    } while (idx != is->mcp_print_idx);
    *c = '\0';
    MX_PRINT(("LANai[%d]: %s", is->id, is->mcp_print_buffer));
    is->mcp_print_idx = idx;
    mcp_print_limit = (uint32_t*)((char *)is->lanai.sram + is->mcp_print_limit_addr);
    if (idx == 0) {
      *mcp_print_limit = htonl(is->mcp_print_len - 1);
    }
    else {
      *mcp_print_limit = htonl(idx - 1);
    }
    MX_STBAR();
  } else if (is->mcp_print_len && idx >= is->mcp_print_len) {
    MX_WARN(("mx%d: print interrupt with invalid index %d, max %d\n",
	     is->id, idx, is->mcp_print_len - 1));
  }
  mx_spin_unlock_irqrestore(&mx_lanai_print_spinlock, flags);
}

/*
 * The eeprom strings on the lanaiX have the format
 * SN=x\0
 * MAC=x:x:x:x:x:x\0
 * PT:ddd mmm xx xx:xx:xx xx\0
 * PV:ddd mmm xx xx:xx:xx xx\0
 */
void
mx_parse_eeprom_strings(mx_instance_state_t *is)
{
#define MX__NEXT_STRING(p) while(*ptr++)
  char *ptr = is->lanai.eeprom_strings;
  int i, hv, lv;

  while (*ptr != '\0') {
    if (memcmp(ptr, "SN=", 3) == 0) {
      sscanf(ptr + 3, "%d", &is->lanai.serial);
    }
    else if (memcmp(ptr, "MAC=", 4) == 0) {
      ptr+=4;
      is->mac_addr_string = ptr;
      for (i = 0; i < 6; i++) {
	if ((ptr + 2) - is->lanai.eeprom_strings >=  MX_EEPROM_STRINGS_LEN)
	  goto abort;

	if (*(ptr+1) == ':') {
	  hv = 0;
	  lv = mx_digit(*ptr); ptr++;
	}
	else {
	  hv = mx_digit(*ptr); ptr++;
	  lv = mx_digit(*ptr); ptr++;
	}
	is->mac_addr[i] = (hv << 4) | lv;
	ptr++;
      }
      ptr--;
    }
    else if (memcmp(ptr, "PC=", 3) == 0) {
      is->lanai.product_code = ptr + 3;
    }
    else if (memcmp(ptr, "PN=", 3) == 0) {
      is->lanai.part_number = ptr + 3;
    }
    else if (memcmp(ptr, "PT=", 3) == 0) {
    }
    else if (memcmp(ptr, "PV=", 3) == 0) {
    }
    else if (memcmp(ptr, "PWR=", 4) == 0) {
    }
    else if (memcmp(ptr, "TAG=", 4) == 0) {
    }
    else {
      MX_WARN(("skipping unknown eeprom string %s\n", ptr));
    }
    MX__NEXT_STRING(ptr);
  }
  MX_INFO (("Board %d: MAC address = %s\n", is->id, is->mac_addr_string));
  return;
 abort:
  MX_WARN (("Failed to parse eeprom strings\n"));
  MX_WARN (("strings = %p, ptr = %p\n",is->lanai.eeprom_strings, ptr));
}

static void
mx_freeze_board(mx_instance_state_t *is)
{
  /* Leave the CPU in reset, just in case firmware
     has gone nuts */
  is->flags |= MX_IS_DEAD;
  is->board_ops.disable_interrupt(is);
  is->board_ops.park(is);
  if (is->lanai.eeprom_strings) {
    mx_kfree(is->lanai.eeprom_strings);
    is->lanai.eeprom_strings = NULL;
  }
}

void
mx_parse_mcp_error(mx_instance_state_t *is)
{
  uint32_t status, mcp_status, idx;
  uint16_t pci_status = -1;

  mx_read_pci_config_16(is, offsetof(mx_pci_config_t, Status), &pci_status);
  MX_WARN(("PCI-status=0x%04x\n", pci_status));
#ifdef MX_HAS_BRIDGE_PCI_SEC_STATUS
  pci_status = -1;
  pci_status = mx_bridge_pci_sec_status(is);
  MX_WARN(("BRIDGE PCI-sec-status=0x%04x\n", pci_status));
#endif
  
  if (is->board_type == MX_BOARD_TYPE_Z && mx_is_dead(is)) {
    MX_WARN(("Cannot parse error under dead ze\n"));
    return;
  }
  /* look at the MCP status */
  status = mx_mcpi.get_param(is->id, is->lanai.sram, "mcp_status", 
			     &mcp_status);
  if (status) {
    MX_WARN (("Can't get MCP Status, error = %d\n", status));
    return;
  }

  switch (mcp_status) {
  case MX_MCP_STATUS_ERROR:
    MX_WARN (("A fatal error occured in the firmware !\n"));
    break;
    
  case MX_MCP_STATUS_LOAD:
    MX_WARN (("The firmware died before the initialization phase "
	      "(status is MX_MCP_STATUS_LOAD) !\n"));
    break;
    
  case MX_MCP_STATUS_INIT:
    MX_WARN (("The firmware died during the initialization phase "
	      "(status is MX_MCP_STATUS_INIT) !\n"));
    break;
    
  case MX_MCP_STATUS_RUN:
    MX_WARN (("The firmware died after the initialization phase "
	      "(status is MX_MCP_STATUS_RUN) !\n"));
    break;
    
  case MX_MCP_STATUS_PARITY:
    MX_WARN (("The firmware stopped after a SRAM parity error "
	      "(status is MX_MCP_STATUS_PARITY) !\n"));
    break;
    
  default:
    MX_WARN (("The firmware died for an unknown reason (status is 0x%x).\n", 
	      mcp_status));
  }

  /* push out any pending LANai prints */
  status = mx_mcpi.get_param(is->id, is->lanai.sram, "print_buffer_pos", &idx);
  if (!status && (idx != is->mcp_print_idx)) {
    MX_WARN(("Dumping LANai printf buffer on instance %d:\n", is->id));
    mx_lanai_print(is, idx);
  }
}

static int
mx_init_board(mx_instance_state_t *is, uint32_t endpoint_bitmap)
{
  int ms, status, mac_xfer;
  uint32_t mcp_status, cmdoffset, kreqoffset,  kreqq_slots, uptime_offset;
  uint32_t host_query_vpage;


  /* disable busmaster DMA while we fiddle with the board */
  status = mx_disable_pci_config_command_bit(is, MX_PCI_COMMAND_MASTER);
  if (status) {
    MX_WARN(("could not disable PCI busmaster DMA\n"));
    return(status);
  }

  status = is->board_ops.map(is);
  if (status) {
    MX_WARN(("mx%d: Could not map the board\n", is->id));
    return status;
  }

  MX_INFO (("Board %d: device %x, rev %d, %d ports and %d bytes of SRAM available\n",
	    is->id, is->pci_devid, is->pci_rev, is->num_ports,
	    is->sram_size - MX_EEPROM_STRINGS_LEN));

  status = is->board_ops.init(is);
  if (status) {
    goto abort;
  }
  ms = 0;
  do {
    mx_sleep(&is->init_sync, MX_SMALL_WAIT, MX_SLEEP_NOINTR);
    ms += MX_SMALL_WAIT;
    status = mx_mcpi.get_param(is->id, is->lanai.sram, "mcp_status", 
			       &mcp_status);
    if (status) {
      MX_WARN (("Can't get MCP Status, error = %d\n", status));
      goto abort;
    } 
  } while (mcp_status == MX_MCP_STATUS_LOAD && ms <= MX_MCP_INIT_TIMEOUT);
  
  if (mcp_status != MX_MCP_STATUS_INIT) {
    MX_WARN (("Timed out waiting for MCP to start, mcp_status = 0x%x\n", 
	      mcp_status));
    mx_parse_mcp_error(is);
    status = ENXIO;
    goto abort;
  }

  /* enable busmaster DMA now that mcp is loaded */
  status = mx_enable_pci_config_command_bit(is, MX_PCI_COMMAND_MASTER);
  if (status) {
    MX_WARN(("could not enable PCI busmater DMA\n"));
    goto abort;
  }

  MX_DEBUG_PRINT (MX_DEBUG_BOARD_INIT, ("Setting API Version\n"));
  mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, 
		    "driver_api_version", MX_MCP_DRIVER_API_VERSION);


  mac_xfer = ((is->mac_addr[0] << 24) | (is->mac_addr[1] << 16) 
	      | (is->mac_addr[2] << 8) | is->mac_addr[3]);
  status = mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, 
			     "mac_high32", mac_xfer);
  if (status) {
    MX_WARN(("could not setup mac_high32\n"));
    goto abort;
  }
  
  mac_xfer = ((is->mac_addr[4] << 8) | is->mac_addr[5]);
  status = mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, 
			     "mac_low16", mac_xfer);
  if (status) {
    MX_WARN(("could not setup mac_low16\n"));
    goto abort;
  }
  
  mac_xfer = ((is->mac_addr[0] << 8) | is->mac_addr[1]);
  status = mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, 
			     "mac_high16", mac_xfer);
  if (status) {
    MX_WARN(("could not setup mac_high16\n"));
    goto abort;
  }
  
  mac_xfer = ((is->mac_addr[2] << 24) | (is->mac_addr[3] << 16) 
	      | (is->mac_addr[4] << 8) | is->mac_addr[5]);
  status = mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, 
			     "mac_low32", mac_xfer);
  if (status) {
    MX_WARN(("could not setup mac_low32\n"));
    goto abort;
  }

  status = mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, "endpt_recovery", 
		    endpoint_bitmap);
  if (status) {
    MX_WARN(("could not setup endpt_recovery\n"));
    goto abort;
  }

  
  mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, "nodes_cnt", 
		    mx_max_nodes);

  mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, "endpoints_cnt", 
		    mx_max_endpoints);

  mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, "send_handles_cnt", 
		    mx_max_send_handles);

  mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, "pull_handles_cnt", 
		    mx_max_pull_handles);

  mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, "push_handles_cnt", 
		    mx_max_push_handles);

  mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, "rdma_windows_cnt", 
		    mx_max_rdma_windows);

  mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, "random_seed", 
		    (uint32_t)mx_rand());

  mx_mcpi.set_param(is->id, (volatile uint8_t *)NULL, "intr_coal_delay", 
		    mx_intr_coal_delay);

  MX_DEBUG_PRINT (MX_DEBUG_BOARD_INIT, ("Dumping Globals\n"));
  mx_mcpi.set_globals(is->id, is->lanai.sram);

  mx_globals(is)->bogus_page.low = htonl(is->bogus.pin.dma.low);
  mx_globals(is)->bogus_page.high = htonl(is->bogus.pin.dma.high);
  mx_globals(is)->pcie_down_on_error = htonl(mx_pcie_down_on_error);
  
  /* printf support */
  status = mx_mcpi.get_param(is->id, is->lanai.sram, 
			     "MX_MCP_PRINT_BUFFER_SIZE", 
			     &is->mcp_print_len);
  status |= mx_mcpi.get_param(is->id, is->lanai.sram, "print_buffer_addr", 
			      &is->mcp_print_addr);
  status |= mx_mcpi.get_param(is->id, is->lanai.sram, "print_limit_addr", 
			      &is->mcp_print_limit_addr);

  if (status) {
    is->mcp_print_len = 0;
    MX_WARN(("Can't determine location or size of MCP printf buffer\n"));
  } else {
    is->mcp_print_buffer = mx_kmalloc(is->mcp_print_len + 1,
				      MX_MZERO | MX_WAITOK);
    if (is->mcp_print_buffer == NULL) {
      MX_WARN(("Can't allocate MCP host printf buffer\n"));
      is->mcp_print_len = 0;
    }
  }

  if (is->mcp_print_len != 0) {
    uint32_t *mcp_print_limit;
    mcp_print_limit = (uint32_t*)((char *)is->lanai.sram + is->mcp_print_limit_addr);
    *mcp_print_limit = htonl(is->mcp_print_len - 1);
    MX_STBAR();
  }

  is->intr.intrq = 0;
  is->intr.seqnum = 0;

  /* let the MCP know we've finished setting things up, and that it
     may now send us an interrupt */

  status = mx_mcpi.set_param(is->id, is->lanai.sram, "params_ready", 1);
  if (status != 0) {
      MX_WARN (("Can't set MCP params_ready, error = %d\n", status));
      goto abort;
  }

  MX_DEBUG_PRINT (MX_DEBUG_BOARD_INIT, ("run, baby, run!\n"));

  status = mx_sleep(&is->init_sync, MX_MCP_INIT_TIMEOUT, MX_SLEEP_NOINTR);
  if (status) {
    if (is->intr.q[0][0].type != MX_MCP_INTR_INIT_DONE) {
      MX_WARN (("Timed out waiting for MX_MCP_INTR_INIT_DONE\n"));
      if (is->board_type < MX_BOARD_TYPE_Z)
	mx_parse_mcp_error(is);
      status = ENXIO;
      goto abort;
    }
  }
  
  if (is->board_type == MX_BOARD_TYPE_Z && MX_DEBUG) {
    uint32_t count_offset;
    mx_mcpi.get_param(is->id, is->lanai.sram, "counters_offset", &count_offset);
    MX_INFO(("counters are at offset 0x%x in SRAM\n", count_offset));
  }
  /* find the command queues */
  status = mx_mcpi.get_param(is->id, is->lanai.sram, "Command queue offset", &cmdoffset);
  if (status) {
    MX_WARN (("Could not locate command queue in NIC SRAM, status %d\n", status));
    goto abort;
  }

  is->cmdq.mcp = (mcp_slot_t *)(is->lanai.sram + cmdoffset);
  is->cmdq.submitted = is->cmdq.completed = 0;

  status = mx_mcpi.get_param(is->id, is->lanai.sram, "kreqq_offset", 
			     &kreqoffset);
  if (status) {
    MX_WARN (("Could not locate kernel request queue in NIC SRAM, status %d\n", status));
    goto abort;
  }

  is->kreqq = (mcp_kreq_t *)(is->lanai.sram + kreqoffset);
  is->kreqq_submitted = 0;
  is->kreqq_completed = 0;

  status = mx_mcpi.get_param(is->id, is->lanai.sram, "MX_MCP_KREQQ_CNT",
			     &kreqq_slots);

  if (status) {
    MX_WARN (("Can't determine number of kernel request queue slots\n"));
    return status;
  }
  is->kreqq_max_index = kreqq_slots - 1;

  status = mx_mcpi.get_param(is->id, is->lanai.sram, "MCP uptime offset", 
			     &uptime_offset);
  if (status) {
    is->lanai_uptime_ptr = 0;
    MX_WARN (("Can't find uptime in lanai memory, no watchdog running\n"));
  } else {
    is->lanai_uptime_ptr = (uint32_t *)((char *)is->lanai.sram + uptime_offset);
  }

  status = mx_mcpi.get_param(is->id, is->lanai.sram, "host_query_vpage", 
			     &host_query_vpage);
  if (status) {
    MX_WARN(("Could not find location of host query vpage\n"));
    goto abort;
  }

  /* set the DMA addr of the host query vpage */
  MX_PIO_WRITE(&(((mcp_dma_addr_t *)(is->lanai.sram + host_query_vpage))->low), 
	       htonl(is->host_query.pin.dma.low));
  MX_PIO_WRITE(&(((mcp_dma_addr_t *)(is->lanai.sram + host_query_vpage))->high),
	       htonl(is->host_query.pin.dma.high));
  return 0;

 abort:
  mx_freeze_board(is);

  if (is->mcp_print_len != 0) {
    is->mcp_print_len = 0;
    mx_kfree(is->mcp_print_buffer);
    is->mcp_print_buffer = NULL;
  }

  if (is->lanai.sram)
    is->board_ops.unmap(is);

  return(status);
}


static void
mx_free_route(mx_instance_state_t *is, mx_routes_t *routes)
{
  if (routes->host_table == NULL) {
    return;
  }

  mx_sync_destroy(&routes->cmd_sync);
  mx_kfree(routes->host_table);

  if (routes->offsets)
    mx_kfree(routes->offsets);

  bzero(routes, sizeof(routes));
}

static int
mx_alloc_route(mx_instance_state_t *is, mx_routes_t *routes, uint32_t port)
{
  int status;
  uint32_t routeblock_size;

  status = mx_mcpi.get_param(is->id, NULL, 
			     "MX_MCP_ROUTE_BLOCK_SIZE", &routeblock_size);
  if (status) {
    MX_WARN(("Failed to determine routeblock size\n"));
    goto abort_with_nothing;
  }

  if (!MX_IS_POWER_OF_TWO(routeblock_size)) {
    MX_WARN(("Routeblock size %d is not a power of two!\n", routeblock_size));
    goto abort_with_nothing;
    
  }

  routes->block_size = routeblock_size;
  routes->host_table = mx_kmalloc(mx_max_nodes * routeblock_size, 
				  MX_MZERO|MX_WAITOK);
  
  if (!routes->host_table) {
    MX_WARN(("Failed to allocate memory for host route table\n"));
    status = ENOMEM;
    goto abort_with_nothing;
  }
  
  mx_sync_init(&routes->cmd_sync, is, -1, "route cmd sync");
  routes->offsets = mx_kmalloc(mx_max_nodes * sizeof(routes->offsets[0]),
				 MX_MZERO|MX_WAITOK);
  
  if (!routes->offsets) {
    MX_WARN(("Failed to allocate memory for route table offsets\n"));
    status = ENOMEM;
    goto abort_with_alloc;
  }

  return 0;
  
 abort_with_alloc:
  mx_free_route(is, routes);

 abort_with_nothing:
  return status;
}


static void
mx_free_routes(mx_instance_state_t *is)
{
  int i;

  if (!is->routes)
    return;

  for (i = 0; i < is->num_ports; i++) {
    mx_free_route(is, &is->routes[i]);
  }

  mx_kfree(is->routes);
  is->routes = 0;
}

static int
mx_alloc_routes(mx_instance_state_t *is)
{
  int i, status;

  is->routes = mx_kmalloc(is->num_ports * sizeof(is->routes[0]),
			 MX_MZERO|MX_WAITOK);
  if (!is->routes)
    return ENOMEM;

  for (i = 0; i < is->num_ports; i++) {
    status = mx_alloc_route(is, &is->routes[i], i);
    if (status) {
      mx_free_routes(is);
      return status;
    }
  }
  return 0;
}

static int
mx_init_routes(mx_instance_state_t *is)
{
  int i, status;
  uint32_t offset;

  for (i = 0; i < is->num_ports; i++) {
    status = mx_lanai_command(is, MX_MCP_CMD_GET_ROUTES_OFFSET,
			      i, 0, 0, &offset, &is->routes[i].cmd_sync);
    if (status) {
      MX_WARN (("Can't determine mcp route table offset \n"));
      return status;
    }
    
    is->routes[i].mcp_table = ((char *)(is->lanai.sram + (unsigned long)offset));
  }
  return 0;
}

static void
mx_free_rdmawin_vpages(mx_instance_state_t *is)
{
  int i;
  if (is->rdmawin_vpages) {
    for (i=0;i<is->rdmawin_vpages_count;i++) {
      struct mx_rdmawin_vpage *vpage = is->rdmawin_vpages + i;
      if (vpage->alloc_addr) {
	mx_free_dma_page(is,&vpage->alloc_addr,&vpage->pin);
	vpage->alloc_addr = 0;
      }
    }
    mx_kfree(is->rdmawin_vpages);
    is->rdmawin_vpages = 0;
  }
}


static int
mx_alloc_rdmawin_vpages(mx_instance_state_t *is)
{
  int i;
  int status;
  int pages = ((mx_max_rdma_windows * mx_max_endpoints + MX_RDMA_WIN_PER_PAGE -1)
	       / MX_RDMA_WIN_PER_PAGE);
  
  is->rdmawin_vpages_count = pages;
  is->rdmawin_vpages = mx_kmalloc(sizeof(is->rdmawin_vpages[0]) * pages, MX_MZERO);
  if (!is->rdmawin_vpages)
    return ENOMEM;
  for (i=0;i < pages; i++) {
    struct mx_rdmawin_vpage *vpage = is->rdmawin_vpages + i;
    vpage->alloc_addr = 0;
    status = mx_alloc_zeroed_dma_page(is, &vpage->alloc_addr,
				      (char **)&vpage->rdmawins,
				      &vpage->pin);
    if (status != 0) {
      mx_free_rdmawin_vpages(is);
      return status;
    }
  }
  return 0;
}


static int
mx_alloc_intrqs(mx_instance_state_t *is)
{
  uint32_t dma_addr, maxslots;
  int status;

  status = mx_mcpi.get_param(is->id, NULL, "MX_MCP_INTRQ_SLOTS",
			     &maxslots);
  if (status) {
    MX_WARN (("Can't determine number of interrupt queue slots\n"));
    return status;
  }
  mx_always_assert((maxslots * sizeof(mcp_slot_t) * 2) <= PAGE_SIZE);

  status = mx_alloc_zeroed_dma_page(is, &is->intr.alloc_addr,
				    (char **)&is->intr.q[0],
				    &is->intr.pin);

  if (status) {
    MX_WARN(("Can't allocate interrupt queues\n"));
    return status;
  }
  is->intr.q[1] = is->intr.q[0] + maxslots;
  dma_addr = is->intr.pin.dma.low;
  status = mx_mcpi.set_param(is->id, NULL, 
			     "host_intr_queue[0].low", dma_addr);
  dma_addr += (uint32_t)(MX_MCP_INTRQ_SLOTS * sizeof(mcp_slot_t));
  status |= mx_mcpi.set_param(is->id, NULL, 
			      "host_intr_queue[1].low", dma_addr);
  status |= mx_mcpi.set_param(is->id, NULL, 
			      "host_intr_queue[0].high", 
			      is->intr.pin.dma.high);    
  status |= mx_mcpi.set_param(is->id, NULL, 
			      "host_intr_queue[1].high", 
			      is->intr.pin.dma.high);    
  if (status) {
    MX_WARN(("Could not set interrupt queue DMA addresses\n"));
    goto abort_with_alloc;
  }

  /* only set intr.maxslots after allocating the intrq.  This 
     can prevent a bad memory access in the interrupt handler
     with a shared irq */

  is->intr.maxslots = maxslots;
  return 0;

 abort_with_alloc:
  mx_free_dma_page(is, &is->intr.alloc_addr, &is->intr.pin);
  is->intr.alloc_addr = NULL;
  return status;
}

static void
mx_free_intrqs(mx_instance_state_t *is)
{
  is->intr.maxslots = 0;
  if (is->intr.alloc_addr != NULL) {
    mx_free_dma_page(is, &is->intr.alloc_addr, &is->intr.pin);
    is->intr.alloc_addr = NULL;
  }
}

int  
mx_instance_init (mx_instance_state_t *is, int32_t unit)
{
  int status;
  uint32_t controlq_slots;
  uint16_t vendor;
  uint16_t device;
  uint8_t cacheline_size;

  STAILQ_INIT(&is->cmdq.host_cmdq);
  STAILQ_INIT(&is->cmdq.mcp_cmdq);
  status = mx_enable_pci_config_command_bit(is, MX_PCI_COMMAND_MEMORY);
  if (status) {
    MX_WARN(("MX: Unable to enable PCI memory space access for board %d\n",
	     unit));
    goto abort_with_nothing;
  }
    
  is->id = unit;

  status = mx_read_pci_config_16(is, offsetof(mx_pci_config_t, 
					      Vendor_ID), &vendor);
  if (status) {
    MX_WARN(("Could not determine board vendor id\n"));
    return ENXIO;
  }
  
  status = mx_read_pci_config_16(is, offsetof(mx_pci_config_t, 
					      Device_ID), &device);
  if (status) {
    MX_WARN(("Could not determine board device id\n"));
    return ENXIO;
  }  
  
  status = mx_read_pci_config_8(is, offsetof(mx_pci_config_t, 
					     Revision_ID), &is->pci_rev);
  if (status) {
    MX_WARN(("Could not determine board revision id\n"));
    return ENXIO;
  }

  status = mx_read_pci_config_16(is, offsetof(mx_pci_config_t, 
					     Device_ID), &is->pci_devid);
  if (status) {
    MX_WARN(("Could not determine board device id\n"));
    return ENXIO;
  }
  
  status = mx_read_pci_config_8(is, offsetof(mx_pci_config_t, 
					      Cache_Line_Size), 
				 &cacheline_size);
  if (status) {
    MX_WARN(("Could not determine board cache line size\n"));
    return ENXIO;
  }
  
  /* set the number of ports (used for managing routes) */

  status = mx_select_board_type(is, vendor, device, is->pci_rev);
  if (status != 0) {
    return ENXIO;
  }
  if (mx_mcpi.init(unit, is->board_type)) {
    MX_WARN(("Couldn't find appropriate firmware version for board %d (%d)\n", 
	     unit, is->board_type));
    return ENXIO;
  }

  /* cachline size is reported in units of "dwords" (4 bytes), but we
     want it in bytes */
  if (mx_cacheline_size == 0)
    mx_cacheline_size = 4 * (uint32_t)cacheline_size;
  
  if (mx_cacheline_size != (uint32_t)sizeof(uint32_t) * (uint32_t)cacheline_size)
    MX_WARN(("Different boards are marked with different cacheline sizes? old = %d, new = %d\n",
	     mx_cacheline_size, 4 * (uint32_t)cacheline_size));

  /* malloc the peer request buffer */
  status = mx_alloc_zeroed_dma_page(is, &is->host_query.alloc_addr,
				    (char **)&is->host_query.buf, &is->host_query.pin);
  if (status) {
    MX_WARN(("Could not allocate query response buffer\n"));
    goto abort_with_nothing;
  }


  /* malloc the array of endpoints */
  is->es = mx_kmalloc 
    (sizeof(is->es[0]) * mx_max_endpoints, MX_MZERO);

  if (is->es == 0) {
    status = ENOMEM;
    MX_WARN(("Could not allocate the endpoints\n"));
    goto abort_with_host_query;
  }
    
  mx_sync_init(&is->logging.sync, is, -1, "logging sync");
  mx_sync_init(&is->dmabench.cmd_sync, is, -1, "dmabench cmd sync");
  mx_sync_init(&is->dmabench.wait_sync, is, -1, "dmabench wait sync");
  mx_spin_lock_init(&is->kreqq_spinlock, is, -1, "kreqq spinlock");

  mx_atomic_set(&is->ref_count, 0);

  
  status = mx_check_for_msi(is);
  if (status) {
    MX_WARN(("Could not check for MSI support\n"));
    goto abort_with_malloc;
  }
  status = mx_check_for_pcix_rbc(is);
  if (status) {
    MX_WARN(("Could not adjust PCIX RBC\n"));
    goto abort_with_malloc;
  }

  status = mx_mcpi.get_param(is->id, NULL, 
			     "MX_MCP_COMMANDQ_SLOTS", &controlq_slots);

  if (status) {
    MX_WARN (("Can't determine number of command queue slots\n"));
    goto abort_with_rbc;
  }
  is->cmdq.max_index = controlq_slots - 1;


  status = mx_alloc_intrqs(is);
  if (status) {
    MX_WARN(("Could not allocate the interrupts queues\n"));
    goto abort_with_rbc;
  }

  status = mx_alloc_dma_page(is, &is->bogus.alloc, &is->bogus.addr, &is->bogus.pin);
  if (status) {
    MX_WARN(("Could not allocate the interrupts queues\n"));
    goto abort_with_intrqs;
  }

  status = mx_alloc_routes(is);
  if (status) {
    MX_WARN(("Could not allocate routes\n"));
    goto abort_with_bogus;
  }

  mx_sync_init(&is->sync, is, -1, "is->sync");
  mx_sync_init(&is->init_sync, is, -1, "is->init_sync");
  mx_spin_lock_init(&is->cmdq.spinlock, is, -1, "is->cmdq.spinlock");

  status = mx_alloc_rdmawin_vpages(is);
  if (status) {
    MX_WARN(("Could not allocate rdma windows\n"));
    goto abort_with_sync;
  }

  is->lanai.eeprom_strings = mx_kmalloc(MX_EEPROM_STRINGS_LEN, MX_NOWAIT);
  if (!is->lanai.eeprom_strings) {
    MX_WARN (("unable to allocate buffer to save eeprom strings\n"));
    goto abort_with_rdmawin;
  }

  status = mx_init_board(is, 0);
  if (status) {
    MX_WARN(("Could not init the board\n"));
    /* note that the eeprom strings are cleaned up inside
       of mx_init_board() */
    goto abort_with_rdmawin;  
  }

  status = mx_init_routes(is);
  if (status) {
    MX_WARN(("Could not init routes\n"));
    goto abort_with_rdmawin;
  }

  is->board_ops.get_freq(is);
  mx_mutex_enter(&mx_global_mutex);
  mx_mutex_enter(&is->sync);
  mx_instances[is->id] = is;
  mx_num_instances += 1;
  mx_mutex_exit(&is->sync);
  mx_mutex_exit(&mx_global_mutex);

  /* add ourself, and any peers which may have been found prior to
     this board being attached */
  mx_add_peers(is);

  mx_start_mapper(is);
#if MX_OS_LINUX || MX_OS_FREEBSD || MX_OS_UDRV
  status = mx_ether_attach(is);
  if (status) {
    MX_WARN(("mx%d: failed to attach ethernet device, err=%d\n",
	     is->id, status));
  }
#endif
  mx_mutex_enter(&is->sync);
  return 0;

 abort_with_rdmawin:
  mx_free_rdmawin_vpages(is);

 abort_with_sync:
  mx_sync_destroy(&is->sync);
  mx_sync_destroy(&is->init_sync);
  mx_spin_lock_destroy(&is->cmdq.spinlock);

  mx_free_routes(is);

 abort_with_bogus:
  mx_free_dma_page(is, &is->bogus.alloc, &is->bogus.pin);

 abort_with_intrqs:
  mx_free_intrqs(is);

 abort_with_rbc:
  mx_restore_pci_cap(is);

 abort_with_malloc:
  mx_kfree(is->es);

  mx_sync_destroy(&is->logging.sync);
  mx_sync_destroy(&is->dmabench.cmd_sync);
  mx_sync_destroy(&is->dmabench.wait_sync);
  mx_spin_lock_destroy(&is->kreqq_spinlock);

 abort_with_host_query:
  mx_free_dma_page(is, &is->host_query.alloc_addr,  &is->host_query.pin);

 abort_with_nothing:

  return status;
}

int  
mx_instance_finalize (mx_instance_state_t *is)
{
  mx_mutex_enter(&is->sync);
  is->flags |= MX_IS_DEAD;
  mx_mutex_exit(&is->sync);

  mx_stop_mapper(is);

  if (is->cmdq.len) {
    MX_INFO(("Board %d: command queue length at finalize is %d, completing..\n", 
	     is->id, is->cmdq.len));
    while(is->cmdq.len)
      mx_lanai_command_complete(is, 0xdead, MX_MCP_CMD_UNKNOWN, -1);
  }

  mx_mutex_enter(&mx_global_mutex);
  mx_mutex_enter(&is->sync);

  if (mx_atomic_read(&is->ref_count)) {
    MX_WARN(("mx_instance_finalize: %ld refs remain returning EBUSY\n", 
	     (long)mx_atomic_read(&is->ref_count)));
    mx_mutex_exit(&is->sync);
    mx_mutex_exit(&mx_global_mutex);
    return EBUSY;
  }

#if MX_OS_LINUX || MX_OS_FREEBSD || MX_OS_UDRV
  mx_ether_detach(is);
#endif

  if (is->lanai.sram != NULL) {
    /*note -- this leaves the lanai in reset, so we can start to free
      things */
    mx_freeze_board(is);
  }

  mx_restore_pci_cap(is);

  if (is->host_query.alloc_addr) {
    mx_free_dma_page(is, &is->host_query.alloc_addr, &is->host_query.pin);
  }

  mx_free_rdmawin_vpages(is);
  mx_instances[is->id] = 0;
  mx_num_instances -= 1;
  mx_mutex_exit(&is->sync);
  mx_mutex_exit(&mx_global_mutex);
  mx_spin_lock_destroy(&is->cmdq.spinlock);

  mx_sync_destroy(&is->sync);
  mx_sync_destroy(&is->init_sync);
  if (is->es) {
    mx_kfree(is->es);
    mx_sync_destroy(&is->logging.sync);
    mx_sync_destroy(&is->dmabench.cmd_sync);
    mx_sync_destroy(&is->dmabench.wait_sync);
    mx_spin_lock_destroy(&is->kreqq_spinlock);
  }
  is->es = 0;

  if (is->lanai.sram != NULL) {
    /* unmap the board */
    is->board_ops.unmap(is);
  }
  mx_free_dma_page(is, &is->bogus.alloc, &is->bogus.pin);
  mx_free_intrqs(is);
  mx_free_routes(is);

  is->mcp_print_len = 0;
  mx_kfree(is->mcp_print_buffer);
  is->mcp_print_buffer = NULL;
  if (is->saved_state.registers != NULL) {
    mx_kfree(is->saved_state.registers);
    is->saved_state.registers = NULL;
    is->saved_state.num_registers = 0;
  }
  return 0;
}

int
mx_mmap_off_to_kva(mx_endpt_state_t *es, unsigned long req, void **kva,
		   int *mem_type, mx_page_pin_t **pin)
{
  unsigned long off = 0;
  void *tmpkva;
  int index;

  /* mapping 1: (off <= sendq_size) gets the sendq */
  tmpkva = (void *)((char *)es->sendq.addr + (req - off));
  
  index = (req - off) / PAGE_SIZE;

  off += (unsigned long)es->sendq.size;
  if (req < off) {
    *kva = (void *)(uintptr_t)(es->sendq.pins[index].va);
    *pin = &es->sendq.pins[index];
    *mem_type = MX_MEM_HOSTMEM;
    return 0;
  }

  /* mapping 2: (off <= recvq_size) gets the recvq */
  tmpkva = (void *)((char *)es->recvq.addr + (req - off));

  index = (req - off) / PAGE_SIZE;
  off += (unsigned long)es->recvq.size;
  if (req < off) {
    *kva = (void *)(uintptr_t)(es->recvq.pins[index].va);
    *pin = &es->recvq.pins[index];
    *mem_type = MX_MEM_HOSTMEM;
    return 0;
  }

  /* mapping 3: (off <= eventq_size) gets the eventq */
  tmpkva = (void *)((char *)es->eventq.addr + (req - off));

  index = (req - off) / PAGE_SIZE;
  off += (unsigned long)es->eventq.size;
  if (req < off) {
    *kva = (void *)(uintptr_t)(es->eventq.pins[index].va);
    *pin = &es->eventq.pins[index];
    *mem_type = MX_MEM_HOSTMEM;
    return 0;
  }

  /* mapping 4: (off <= mmap_sram_size) gets the mmaped sram */
  tmpkva = (void *)((char *)es->user_mmapped_sram.addr + (req - off));

  off += (unsigned long)es->user_mmapped_sram.size;
  if (req < off) {
    *kva = tmpkva;
    *mem_type = MX_MEM_SRAM;
    return 0;
  }

  /* mapping 5: (off <= mmap_zereq_size) gets the ze req window */
  tmpkva = (void *)((char *)es->user_mmapped_zreq.addr + (req - off));

  off += (unsigned long)es->user_mmapped_zreq.size;
  if (req < off) {
    *kva = tmpkva;
    *mem_type = MX_MEM_SRAM;
    return 0;
  }

  /* mapping 6: (off <= kernel_vars_size) gets the kernel window (jiffies) */
  off += MX_PAGE_SIZE;
  if (req + MX_PAGE_SIZE == off && es->is->kernel_window) {
    *kva = es->is->kernel_window;
    *mem_type = MX_MEM_HOSTMEM;
    return 0;
  }

#ifdef notyet

 if (!es->privledged)
    return EPERM;
  
  /* mapping 6: (off > (1U << 31)) gets the SRAM */
  off = (1U << 31);
  tmpkva = (void *)((char *)es->is->lanai.sram + (req - off));

  off += (unsigned long)es->is->sram_size;
  if (req < off) {
    *kva = tmpkva;
    *mem_type = MX_MEM_SRAM;
    return 0;
  }

  /* mapping 7: (off > (1U << 31)+128M) gets the specials */
  off = (1U << 31) + 128*1024*1024;
  tmpkva = (void *)((char *)es->is->lanai.special_regs + (req - off));  
  off += (unsigned long)es->is->specials_size;
  if (req < off) {
    *kva = tmpkva;
    *mem_type = MX_MEM_SPECIAL;
    return 0;
  }

#endif

  return EINVAL;

}

/* returns:
   - 1 if an error has been handled, either by marking
   the board dead (only strategy below) or by recovering the error
   - 0 if not parity error was present */
static int
mx_handle_parity_error(mx_instance_state_t *is)
{
#if 1
  if (is->board_ops.detect_parity_error(is) == 0)
    return 0;

  /* For now, if we have a real parity error, reset the lanai so it
     will drop packets, and return to the watchdog, who will mark the
     board dead */

  MX_WARN(("Parity error detected on board %d\n", is->id));
  mx_mark_board_dead(is, MX_DEAD_SRAM_PARITY_ERROR, 0);
  return 1;

#else
  volatile uint64_t *mem;
  volatile uint64_t *end;
  volatile uint64_t dont_care;
  int status, ms, do_ether_reattach, i;
  int found_inside_critical = 0;
  int board_ok = 0;
  uint32_t parity_status, endpoint_bitmap;
  mx_sync_t tmp_sync;

  if (MX_DEBUG && mx_simulate_parity_error == is->id + 1) {
    /* grab some state and jump into the recovery code */
    mx_mutex_exit(&mx_global_mutex);  
    mx_sync_init(&tmp_sync, is, 0, "parity recovery temp sync");  
    goto simulate_parity_error;
  }

  if ((mx_read_lanai_special_reg_u32(is, lx.ISR) & MX_LX_PARITY_INT) == 0) 
    return 0;

  /* For now, if we have a real parity error, reset the lanai so it
     will drop packets, and return to the watchdog, who will mark the
     board dead */

  MX_WARN(("Parity error detected on board %d\n", is->id));
  mx_mark_board_dead(is, MX_DEAD_SRAM_PARITY_ERROR, 0);
  return 0;


  /* drop the global mutex */
  mx_mutex_exit(&mx_global_mutex);  

  MX_WARN(("Parity error detected on board %d\n", is->id));

  /* sleep for a little while to give the mcp a chance to notice it
     and start spinning on REQ_ACK_1 before we clear the parity error bit */
  mx_spin(20000);

  /* Check to see if mcp really was doing parity recovery by
     checking to see if REQ_ACK_1 is set */
  if ((mx_read_lanai_special_reg_u32(is, lx.ISR) & MX_LX_REQ_ACK_1) == 0) {
    /* Nope, so we're screwed.  The parity error could have
       been anything */
    MX_WARN(("\t Unrecoverable parity error without scrubber\n"));
    mx_always_assert(0);
  }


  /* block most ioctls, except for the progression thread */
  mx_mutex_enter(&is->sync);
  is->flags |= MX_PARITY_RECOVERY;
  mx_mutex_exit(&is->sync);


  /* Terminate pending lanai commands */
  if (is->cmdq.len) {
    MX_INFO(("Board %d: command queue length at partiy recovery is %d, completing..\n", 
	     is->id, is->cmdq.len));
    while(is->cmdq.len)
      mx_lanai_command_complete(is, 0xdead, MX_MCP_CMD_UNKNOWN, -1);
  }


  /* clear it */
  mx_write_lanai_special_reg_u32(is, lx.ISR, MX_LX_PARITY_INT);

  /* scan critical region and see what we've got */
  mem = (volatile uint64_t *)is->parity_critical_start;
  end = (volatile uint64_t *)is->parity_critical_end;
  while(mem != end) {
    dont_care = *mem;
    MX_STBAR();
#if MX_OS_UDRV
    if (mx_lxgdb && ((((uintptr_t) mem) - ((uintptr_t) is->lanai.sram)) & ~7) 
	== mx_read_lanai_special_reg_u32(is, lx.AISR))
#else
    if ((mx_read_lanai_special_reg_u32(is, lx.ISR) & MX_LX_PARITY_INT) != 0)
#endif
      {
	/* clear it */
	mx_write_lanai_special_reg_u32(is, lx.ISR, MX_LX_PARITY_INT);
	MX_WARN(("\t Parity error found at LANai SRAM address 0x%x\n",
		 (int)((uintptr_t)mem - (uintptr_t)is->lanai.sram)));
	found_inside_critical++;
      }
    mem++;
  }

  MX_WARN(("\t Scan completed:  found_inside_critical = %d\n",
	   found_inside_critical));

  if (found_inside_critical) {
    /* We're screwed.  Since there is an error in the mcp's scrubber,
       we can't recover.  */
    MX_WARN(("\t Unrecoverable parity error(s)\n"));
    mx_always_assert(0);
  }

  MX_WARN(("\t Handshaking with firmware for parity error recovery\n"));
  /* handshake with the mcp and let him figure it out */
  mx_write_lanai_isr(is, MX_LX_REQ_ACK_1);
  MX_STBAR();

  mx_sync_init(&tmp_sync, is, 0, "parity recovery temp sync");  

  ms = 0;
  do {
    mx_sleep(&tmp_sync, MX_SMALL_WAIT, MX_SLEEP_NOINTR);
    ms += MX_SMALL_WAIT;
    status = mx_mcpi.get_param(is->id, is->lanai.sram, "parity_status", 
			       &parity_status);
    if (status) {
      MX_WARN (("Can't get MCP Status in parity reovery, error = %d\n", status));
      mx_always_assert(status == 0);
      
    } 
  } while (parity_status == MX_MCP_PARITY_NONE
	   && ms <= MX_MCP_INIT_TIMEOUT);

  mx_mcpi.set_param(is->id, is->lanai.sram, "parity_status", 0);

  if (parity_status == MX_MCP_PARITY_PANIC ||
      ms > MX_MCP_INIT_TIMEOUT) {
    MX_WARN(("\t Unrecoverable parity error detected\n"));
    mx_always_assert(0);
  }

  if (parity_status == MX_MCP_PARITY_IGNORE) {
    MX_WARN(("\t Parity error in unused SRAM.  Ignoring...\n"));
    board_ok = 1;
    goto pickup_mutex_and_return;
  }

  mx_always_assert(parity_status == MX_MCP_PARITY_REBOOT);

 simulate_parity_error:
  if (mx_simulate_parity_error) {
    mx_mutex_enter(&is->sync);
    is->flags |= MX_PARITY_RECOVERY;
    mx_mutex_exit(&is->sync);
    mx_lanai_reset_on(is);
  }
  mx_simulate_parity_error = 0;

  MX_WARN(("\t Recoverable Parity error:  Stopping mapper and ethernet\n"));

  do_ether_reattach = mx_ether_parity_detach(is);
  
  /* Kill any running mapper */
  mx_stop_mapper(is);

  mx_mutex_enter(&is->sync);
  is->parity_errors_detected++;
  mx_mutex_exit(&is->sync);
  /* maybe wake all endpoints..? */


  for (endpoint_bitmap = 0, i = 0; i < mx_max_endpoints; i++) {
    if (is->es[i] != NULL)
      endpoint_bitmap |= (1 << i);
  }

  MX_WARN(("\t Reloading firmware, endpoint_bitmap = 0x%x \n", endpoint_bitmap));

  status = mx_init_board(is, endpoint_bitmap);
  if (status) {
    MX_WARN(("Could not re-init the board\n"));
    board_ok = 0;
    goto pickup_mutex_and_return;
  }

  mx_mutex_enter(&is->sync);
  is->flags &= ~MX_PARITY_RECOVERY;
  mx_mutex_exit(&is->sync);
  mx_add_peers(is);

  is->parity_errors_corrected++;

  MX_WARN(("\t Firwmare has been reloaded.  Re-starting kernel services \n"));
  /* re-start the mapper, and re-attach ethernet interface */

  mx_start_mapper(is);

  if (do_ether_reattach)
    mx_ether_parity_reattach(is);

  MX_WARN(("\t Finished recovering from parity error \n"));
  board_ok = 1;

 pickup_mutex_and_return:
  mx_sync_destroy(&tmp_sync);

  /* return to the caller with the global mutex held */
  mx_mutex_enter(&mx_global_mutex);  
  return board_ok;
#endif
}

void
mx_watchdog_body(void)
{
  int i;
  uint32_t uptime;
  mx_instance_state_t *is;
  static int limit = 10;

  mx_mutex_enter(&mx_global_mutex);
  for (i = 0; i < mx_max_instance; ++i) {
    uint16_t pci_status, pci_cmd_reg;
    is = mx_instances[i];
    if (!is || mx_is_dead(is) || !is->lanai_uptime_ptr)
	continue;
    if (mx_read_pci_config_16(is, offsetof(mx_pci_config_t, Status), &pci_status) != 0
	|| (pci_status & MX_PCI_STATUS_PERR)) {
      MX_WARN(("Board number %d has PCI parity error\n", is->id));
      mx_mark_board_dead(is, MX_DEAD_PCI_PARITY_ERROR, 0);
      continue;
    }
    if (pci_status & MX_PCI_STATUS_MABORT) {
      MX_WARN(("Board number %d has PCI Master Abort\n", is->id));
      mx_mark_board_dead(is, MX_DEAD_PCI_MASTER_ABORT, 0);
      continue;
    }
#ifdef MX_HAS_BRIDGE_PCI_SEC_STATUS
    if (mx_bridge_pci_sec_status(is) & MX_PCI_STATUS_PERR) {
      MX_WARN(("Board number %d: our PCI bridge is signalling a PCI parity error\n", is->id));
      mx_mark_board_dead(is, MX_DEAD_PCI_PARITY_ERROR, 1);
      continue;
    }
#endif
    if (mx_read_pci_config_16(is, offsetof(mx_pci_config_t, Command), &pci_cmd_reg) != 0
	|| pci_cmd_reg == 0xffff || !(pci_cmd_reg & MX_PCI_COMMAND_MASTER)) {
      MX_WARN(("Board number %d lost Master-Enable bit\n", is->id));
      mx_mark_board_dead(is, MX_DEAD_NIC_RESET, 0);
      continue;
    }

    uptime = ntohl(*is->lanai_uptime_ptr);
    if (uptime == 0) {
      limit--;
      if (limit > 0)
	continue;
    }
    if (uptime == is->old_lanai_uptime ||
	(MX_DEBUG && (mx_simulate_parity_error == i+1))) {
      MX_WARN(("Board number %d stopped with %d seconds uptime\n",
	       is->id, uptime));
      if (mx_handle_parity_error(is) == 0) {
#if MX_OS_UDRV
	if (mx_lxgdb)
	  MX_WARN(("Board number %d seems to be stopped, moving on though\n", 
		   is->id));
	else
#endif
	  mx_mark_board_dead(is, MX_DEAD_WATCHDOG_TIMEOUT, uptime);
      }
    }
    is->old_lanai_uptime = uptime;
  }
  mx_mutex_exit(&mx_global_mutex);
}

/*
 * The mcp died.  Preserve as much information as possible,
 * and put the nic into a safe state.
 */

void
mx_mark_board_dead(mx_instance_state_t *is, int reason, int arg)
{
  uint32_t num_registers;

  /* make sure that we're the first one to discover
     a problem, and flag that the board is dead to
     prevent its use */
  mx_mutex_enter(&is->sync);
  if (mx_is_dead(is)) {
    mx_mutex_exit(&is->sync);
    return;
  }
  is->flags |= MX_IS_DEAD;
  mx_mutex_exit(&is->sync);

  /* usually timeouts are designed that a parity error won't be
     mistaken for a timeout before arriving here, but just in case,
     let's make sure */
  if (reason != MX_DEAD_SRAM_PARITY_ERROR && is->board_ops.detect_parity_error(is)) {
    MX_WARN(("%d: NIC has SRAM Parity Error (but driver reason is %d)\n", is->id, reason));
  }

  /* Save a copy of the registers for mx_dump */
  is->board_ops.dump_registers(is, NULL, &num_registers);
  is->saved_state.registers = 
    mx_kmalloc(sizeof(*is->saved_state.registers) * num_registers,
	       MX_WAITOK|MX_MZERO);
  if (is->saved_state.registers == NULL) {
    MX_WARN(("Could not allocate space to dump registers\n"));
  } else {
    is->saved_state.num_registers = num_registers;
    is->board_ops.dump_registers(is, is->saved_state.registers, &num_registers);
  }

  mx_parse_mcp_error(is);

  is->board_ops.park(is);

  is->saved_state.reason = reason;
  is->saved_state.arg = arg;

  /* Wake the FMA so he knows about the board dying */
  mx_mutex_enter(&is->sync);
  if (is->raw.es) {
    mx_wake(&is->raw.sync);
  }
  mx_mutex_exit(&is->sync);
  MX_WARN(("Board number %d marked dead\n", is->id));

}

#ifndef MX_HAS_MAP_PCI_SPACE
void *
mx_map_pci_space (mx_instance_state_t * is, int bar, uint32_t offset, uint32_t len)

{
  if (bar)
    return NULL;
  else
    return mx_map_io_space(is, offset, len);
}
#endif
